diff mbox series

[v14,03/10] accel: collecting TB execution count

Message ID 20230530083526.2174430-4-fei2.wu@intel.com (mailing list archive)
State New, archived
Headers show
Series TCG code quality tracking | expand

Commit Message

Wu, Fei May 30, 2023, 8:35 a.m. UTC
From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>

If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

[Richard Henderson created the inline gen_tb_exec_count]

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
[AJB: Fix author]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
 accel/tcg/cpu-exec.c          |  6 ++++++
 accel/tcg/tb-stats.c          |  6 ++++++
 accel/tcg/tcg-runtime.c       |  1 +
 accel/tcg/translate-all.c     |  7 +++++--
 accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
 include/exec/gen-icount.h     |  1 +
 include/exec/tb-stats-flags.h |  5 +++++
 include/exec/tb-stats.h       | 13 +++++++++++++
 8 files changed, 62 insertions(+), 2 deletions(-)

Comments

Richard Henderson June 1, 2023, 12:05 a.m. UTC | #1
On 5/30/23 01:35, Fei Wu wrote:
> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
> 
> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
> enabled, then we instrument the start code of this TB
> to atomically count the number of times it is executed.
> We count both the number of "normal" executions and atomic
> executions of a TB.
> 
> The execution count of the TB is stored in its respective
> TBS.
> 
> All TBStatistics are created by default with the flags from
> default_tbstats_flag.
> 
> [Richard Henderson created the inline gen_tb_exec_count]
> 
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
> [AJB: Fix author]
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> ---
>   accel/tcg/cpu-exec.c          |  6 ++++++
>   accel/tcg/tb-stats.c          |  6 ++++++
>   accel/tcg/tcg-runtime.c       |  1 +
>   accel/tcg/translate-all.c     |  7 +++++--
>   accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
>   include/exec/gen-icount.h     |  1 +
>   include/exec/tb-stats-flags.h |  5 +++++
>   include/exec/tb-stats.h       | 13 +++++++++++++
>   8 files changed, 62 insertions(+), 2 deletions(-)
> 
> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> index 0e741960da..c0d8f26237 100644
> --- a/accel/tcg/cpu-exec.c
> +++ b/accel/tcg/cpu-exec.c
> @@ -25,6 +25,7 @@
>   #include "trace.h"
>   #include "disas/disas.h"
>   #include "exec/exec-all.h"
> +#include "exec/tb-stats.h"
>   #include "tcg/tcg.h"
>   #include "qemu/atomic.h"
>   #include "qemu/rcu.h"
> @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
>               mmap_unlock();
>           }
>   
> +        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
> +            tb->tb_stats->executions.atomic++;
> +        }
> +
>           cpu_exec_enter(cpu);
> +
>           /* execute the generated code */
>           trace_exec_tb(tb, pc);
>           cpu_tb_exec(cpu, tb, &tb_exit);
> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
> index f988bd8a31..143a52ef5c 100644
> --- a/accel/tcg/tb-stats.c
> +++ b/accel/tcg/tb-stats.c
> @@ -22,6 +22,7 @@ enum TBStatsStatus {
>   };
>   
>   static enum TBStatsStatus tcg_collect_tb_stats;
> +static uint32_t default_tbstats_flag;
>   
>   void init_tb_stats_htable(void)
>   {
> @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
>   {
>       return tcg_collect_tb_stats == TB_STATS_PAUSED;
>   }
> +
> +uint32_t get_default_tbstats_flag(void)
> +{
> +    return default_tbstats_flag;
> +}

What is the purpose of this function, instead of a global variable?
What is the meaning of 'default' in its name?

> @@ -295,6 +295,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
>       new_stats->pc = pc;
>       new_stats->cs_base = cs_base;
>       new_stats->flags = flags;
> +    new_stats->stats_enabled = get_default_tbstats_flag();

Is this merely to record how we have generated a given TB?
What is the purpose of this flag over the global variable?

> diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
> index 87ee3d902e..fa71eb6f0c 100644
> --- a/include/exec/tb-stats-flags.h
> +++ b/include/exec/tb-stats-flags.h
> @@ -11,6 +11,9 @@
>   #ifndef TB_STATS_FLAGS
>   #define TB_STATS_FLAGS
>   
> +#define TB_NOTHING    (1 << 0)
> +#define TB_EXEC_STATS (1 << 1)

Why is NOTHING a non-zero flag?

> --- a/include/exec/tb-stats.h
> +++ b/include/exec/tb-stats.h
> @@ -31,6 +31,9 @@
>   #include "exec/tb-stats-flags.h"
>   #include "tcg/tcg.h"
>   
> +#define tb_stats_enabled(tb, JIT_STATS) \
> +    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))

Inline function, though again, why stats_enabled vs global variable?


r~
Wu, Fei June 1, 2023, 5:44 a.m. UTC | #2
On 6/1/2023 8:05 AM, Richard Henderson wrote:
> On 5/30/23 01:35, Fei Wu wrote:
>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>
>> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
>> enabled, then we instrument the start code of this TB
>> to atomically count the number of times it is executed.
>> We count both the number of "normal" executions and atomic
>> executions of a TB.
>>
>> The execution count of the TB is stored in its respective
>> TBS.
>>
>> All TBStatistics are created by default with the flags from
>> default_tbstats_flag.
>>
>> [Richard Henderson created the inline gen_tb_exec_count]
>>
>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>> Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
>> [AJB: Fix author]
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> ---
>>   accel/tcg/cpu-exec.c          |  6 ++++++
>>   accel/tcg/tb-stats.c          |  6 ++++++
>>   accel/tcg/tcg-runtime.c       |  1 +
>>   accel/tcg/translate-all.c     |  7 +++++--
>>   accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
>>   include/exec/gen-icount.h     |  1 +
>>   include/exec/tb-stats-flags.h |  5 +++++
>>   include/exec/tb-stats.h       | 13 +++++++++++++
>>   8 files changed, 62 insertions(+), 2 deletions(-)
>>
>> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
>> index 0e741960da..c0d8f26237 100644
>> --- a/accel/tcg/cpu-exec.c
>> +++ b/accel/tcg/cpu-exec.c
>> @@ -25,6 +25,7 @@
>>   #include "trace.h"
>>   #include "disas/disas.h"
>>   #include "exec/exec-all.h"
>> +#include "exec/tb-stats.h"
>>   #include "tcg/tcg.h"
>>   #include "qemu/atomic.h"
>>   #include "qemu/rcu.h"
>> @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
>>               mmap_unlock();
>>           }
>>   +        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
>> +            tb->tb_stats->executions.atomic++;
>> +        }
>> +
>>           cpu_exec_enter(cpu);
>> +
>>           /* execute the generated code */
>>           trace_exec_tb(tb, pc);
>>           cpu_tb_exec(cpu, tb, &tb_exit);
>> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
>> index f988bd8a31..143a52ef5c 100644
>> --- a/accel/tcg/tb-stats.c
>> +++ b/accel/tcg/tb-stats.c
>> @@ -22,6 +22,7 @@ enum TBStatsStatus {
>>   };
>>     static enum TBStatsStatus tcg_collect_tb_stats;
>> +static uint32_t default_tbstats_flag;
>>     void init_tb_stats_htable(void)
>>   {
>> @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
>>   {
>>       return tcg_collect_tb_stats == TB_STATS_PAUSED;
>>   }
>> +
>> +uint32_t get_default_tbstats_flag(void)
>> +{
>> +    return default_tbstats_flag;
>> +}
> 
> What is the purpose of this function, instead of a global variable?
> What is the meaning of 'default' in its name?
> 
tbs have their specific settings, e.g. after 'filter' cmd:
* the last_search tbs has their stats_enabled kept
* tbs not in the list sets their flag to TB_PAUSED

I guess 'default' means the flag for creating new tbstats.

>> @@ -295,6 +295,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t
>> phys_pc, target_ulong pc,
>>       new_stats->pc = pc;
>>       new_stats->cs_base = cs_base;
>>       new_stats->flags = flags;
>> +    new_stats->stats_enabled = get_default_tbstats_flag();
> 
> Is this merely to record how we have generated a given TB?
> What is the purpose of this flag over the global variable?
> 
see above.

>> diff --git a/include/exec/tb-stats-flags.h
>> b/include/exec/tb-stats-flags.h
>> index 87ee3d902e..fa71eb6f0c 100644
>> --- a/include/exec/tb-stats-flags.h
>> +++ b/include/exec/tb-stats-flags.h
>> @@ -11,6 +11,9 @@
>>   #ifndef TB_STATS_FLAGS
>>   #define TB_STATS_FLAGS
>>   +#define TB_NOTHING    (1 << 0)
>> +#define TB_EXEC_STATS (1 << 1)
> 
> Why is NOTHING a non-zero flag?
> 
yes, it might looks better. But there is no correctness issue either as
it checks if the specific bit is enabled during collecting stats.

>> --- a/include/exec/tb-stats.h
>> +++ b/include/exec/tb-stats.h
>> @@ -31,6 +31,9 @@
>>   #include "exec/tb-stats-flags.h"
>>   #include "tcg/tcg.h"
>>   +#define tb_stats_enabled(tb, JIT_STATS) \
>> +    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
> 
> Inline function, though again, why stats_enabled vs global variable?
> 
will inline it.

Thanks,
Fei.

> 
> r~
Richard Henderson June 1, 2023, 2:03 p.m. UTC | #3
On 5/31/23 22:44, Wu, Fei wrote:
> On 6/1/2023 8:05 AM, Richard Henderson wrote:
>> On 5/30/23 01:35, Fei Wu wrote:
>>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>>
>>> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
>>> enabled, then we instrument the start code of this TB
>>> to atomically count the number of times it is executed.
>>> We count both the number of "normal" executions and atomic
>>> executions of a TB.
>>>
>>> The execution count of the TB is stored in its respective
>>> TBS.
>>>
>>> All TBStatistics are created by default with the flags from
>>> default_tbstats_flag.
>>>
>>> [Richard Henderson created the inline gen_tb_exec_count]
>>>
>>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>>> Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
>>> [AJB: Fix author]
>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>> ---
>>>    accel/tcg/cpu-exec.c          |  6 ++++++
>>>    accel/tcg/tb-stats.c          |  6 ++++++
>>>    accel/tcg/tcg-runtime.c       |  1 +
>>>    accel/tcg/translate-all.c     |  7 +++++--
>>>    accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
>>>    include/exec/gen-icount.h     |  1 +
>>>    include/exec/tb-stats-flags.h |  5 +++++
>>>    include/exec/tb-stats.h       | 13 +++++++++++++
>>>    8 files changed, 62 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
>>> index 0e741960da..c0d8f26237 100644
>>> --- a/accel/tcg/cpu-exec.c
>>> +++ b/accel/tcg/cpu-exec.c
>>> @@ -25,6 +25,7 @@
>>>    #include "trace.h"
>>>    #include "disas/disas.h"
>>>    #include "exec/exec-all.h"
>>> +#include "exec/tb-stats.h"
>>>    #include "tcg/tcg.h"
>>>    #include "qemu/atomic.h"
>>>    #include "qemu/rcu.h"
>>> @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
>>>                mmap_unlock();
>>>            }
>>>    +        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
>>> +            tb->tb_stats->executions.atomic++;
>>> +        }
>>> +
>>>            cpu_exec_enter(cpu);
>>> +
>>>            /* execute the generated code */
>>>            trace_exec_tb(tb, pc);
>>>            cpu_tb_exec(cpu, tb, &tb_exit);
>>> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
>>> index f988bd8a31..143a52ef5c 100644
>>> --- a/accel/tcg/tb-stats.c
>>> +++ b/accel/tcg/tb-stats.c
>>> @@ -22,6 +22,7 @@ enum TBStatsStatus {
>>>    };
>>>      static enum TBStatsStatus tcg_collect_tb_stats;
>>> +static uint32_t default_tbstats_flag;
>>>      void init_tb_stats_htable(void)
>>>    {
>>> @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
>>>    {
>>>        return tcg_collect_tb_stats == TB_STATS_PAUSED;
>>>    }
>>> +
>>> +uint32_t get_default_tbstats_flag(void)
>>> +{
>>> +    return default_tbstats_flag;
>>> +}
>>
>> What is the purpose of this function, instead of a global variable?
>> What is the meaning of 'default' in its name?
>>
> tbs have their specific settings, e.g. after 'filter' cmd:
> * the last_search tbs has their stats_enabled kept
> * tbs not in the list sets their flag to TB_PAUSED

How does this affect anything at all?

We are not *checking* the tb->tb_stats->stats_enabled bit except at code generation time, 
not code execution time.  Therefore nothing ever reads the TB_PAUSED bit (or, 
correspondingly, the clearing of the other bits).  The setting of the bit is permanent.

> yes, it might looks better. But there is no correctness issue either as
> it checks if the specific bit is enabled during collecting stats.

No, it does not.  See above.


r~
Wu, Fei June 2, 2023, 1:54 a.m. UTC | #4
On 6/1/2023 10:03 PM, Richard Henderson wrote:
> On 5/31/23 22:44, Wu, Fei wrote:
>> On 6/1/2023 8:05 AM, Richard Henderson wrote:
>>> On 5/30/23 01:35, Fei Wu wrote:
>>>> From: "Vanderson M. do Rosario" <vandersonmr2@gmail.com>
>>>>
>>>> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
>>>> enabled, then we instrument the start code of this TB
>>>> to atomically count the number of times it is executed.
>>>> We count both the number of "normal" executions and atomic
>>>> executions of a TB.
>>>>
>>>> The execution count of the TB is stored in its respective
>>>> TBS.
>>>>
>>>> All TBStatistics are created by default with the flags from
>>>> default_tbstats_flag.
>>>>
>>>> [Richard Henderson created the inline gen_tb_exec_count]
>>>>
>>>> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
>>>> Message-Id: <20190829173437.5926-3-vandersonmr2@gmail.com>
>>>> [AJB: Fix author]
>>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>>> ---
>>>>    accel/tcg/cpu-exec.c          |  6 ++++++
>>>>    accel/tcg/tb-stats.c          |  6 ++++++
>>>>    accel/tcg/tcg-runtime.c       |  1 +
>>>>    accel/tcg/translate-all.c     |  7 +++++--
>>>>    accel/tcg/translator.c        | 25 +++++++++++++++++++++++++
>>>>    include/exec/gen-icount.h     |  1 +
>>>>    include/exec/tb-stats-flags.h |  5 +++++
>>>>    include/exec/tb-stats.h       | 13 +++++++++++++
>>>>    8 files changed, 62 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
>>>> index 0e741960da..c0d8f26237 100644
>>>> --- a/accel/tcg/cpu-exec.c
>>>> +++ b/accel/tcg/cpu-exec.c
>>>> @@ -25,6 +25,7 @@
>>>>    #include "trace.h"
>>>>    #include "disas/disas.h"
>>>>    #include "exec/exec-all.h"
>>>> +#include "exec/tb-stats.h"
>>>>    #include "tcg/tcg.h"
>>>>    #include "qemu/atomic.h"
>>>>    #include "qemu/rcu.h"
>>>> @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
>>>>                mmap_unlock();
>>>>            }
>>>>    +        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
>>>> +            tb->tb_stats->executions.atomic++;
>>>> +        }
>>>> +
>>>>            cpu_exec_enter(cpu);
>>>> +
>>>>            /* execute the generated code */
>>>>            trace_exec_tb(tb, pc);
>>>>            cpu_tb_exec(cpu, tb, &tb_exit);
>>>> diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
>>>> index f988bd8a31..143a52ef5c 100644
>>>> --- a/accel/tcg/tb-stats.c
>>>> +++ b/accel/tcg/tb-stats.c
>>>> @@ -22,6 +22,7 @@ enum TBStatsStatus {
>>>>    };
>>>>      static enum TBStatsStatus tcg_collect_tb_stats;
>>>> +static uint32_t default_tbstats_flag;
>>>>      void init_tb_stats_htable(void)
>>>>    {
>>>> @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
>>>>    {
>>>>        return tcg_collect_tb_stats == TB_STATS_PAUSED;
>>>>    }
>>>> +
>>>> +uint32_t get_default_tbstats_flag(void)
>>>> +{
>>>> +    return default_tbstats_flag;
>>>> +}
>>>
>>> What is the purpose of this function, instead of a global variable?
>>> What is the meaning of 'default' in its name?
>>>
>> tbs have their specific settings, e.g. after 'filter' cmd:
>> * the last_search tbs has their stats_enabled kept
>> * tbs not in the list sets their flag to TB_PAUSED
> 
> How does this affect anything at all?
> 
> We are not *checking* the tb->tb_stats->stats_enabled bit except at code
> generation time, not code execution time.  Therefore nothing ever reads
> the TB_PAUSED bit (or, correspondingly, the clearing of the other
> bits).  The setting of the bit is permanent.
> 
At dump time, it does check stats_enabled e.g. in dump_tb_header(). So
the question is whether FILTER is necessary at all? If not, we can
remove FILTER together with PAUSE, and only keep START & STOP in hmp cmd.

Thanks,
Fei.

>> yes, it might looks better. But there is no correctness issue either as
>> it checks if the specific bit is enabled during collecting stats.
> 
> No, it does not.  See above.
> 
> 
> r~
Richard Henderson June 2, 2023, 4:02 a.m. UTC | #5
On 6/1/23 18:54, Wu, Fei wrote:
>> We are not *checking* the tb->tb_stats->stats_enabled bit except at code
>> generation time, not code execution time.  Therefore nothing ever reads
>> the TB_PAUSED bit (or, correspondingly, the clearing of the other
>> bits).  The setting of the bit is permanent.
>>
> At dump time, it does check stats_enabled e.g. in dump_tb_header(). So
> the question is whether FILTER is necessary at all? If not, we can
> remove FILTER together with PAUSE, and only keep START & STOP in hmp cmd.

Let's start simpler and remove FILTER and PAUSE.


r~
diff mbox series

Patch

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 0e741960da..c0d8f26237 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,6 +25,7 @@ 
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
@@ -562,7 +563,12 @@  void cpu_exec_step_atomic(CPUState *cpu)
             mmap_unlock();
         }
 
+        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+            tb->tb_stats->executions.atomic++;
+        }
+
         cpu_exec_enter(cpu);
+
         /* execute the generated code */
         trace_exec_tb(tb, pc);
         cpu_tb_exec(cpu, tb, &tb_exit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index f988bd8a31..143a52ef5c 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -22,6 +22,7 @@  enum TBStatsStatus {
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
+static uint32_t default_tbstats_flag;
 
 void init_tb_stats_htable(void)
 {
@@ -56,3 +57,8 @@  bool tb_stats_collection_paused(void)
 {
     return tcg_collect_tb_stats == TB_STATS_PAUSED;
 }
+
+uint32_t get_default_tbstats_flag(void)
+{
+    return default_tbstats_flag;
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index e4e030043f..11c6192064 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,6 +27,7 @@ 
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index bb9483785e..dadf49954f 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -295,6 +295,7 @@  static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
     new_stats->pc = pc;
     new_stats->cs_base = cs_base;
     new_stats->flags = flags;
+    new_stats->stats_enabled = get_default_tbstats_flag();
 
     /*
      * All initialisation must be complete before we insert into qht
@@ -382,9 +383,11 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
     /*
      * We want to fetch the stats structure before we start code
      * generation so we can count interesting things about this
-     * generation.
+     * generation. If dfilter is in effect we will only collect stats
+     * for the specified range.
      */
-    if (tb_stats_collection_enabled()) {
+    if (tb_stats_collection_enabled() &&
+        qemu_log_in_addr_range(tb->pc)) {
         tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
     } else {
         tb->tb_stats = NULL;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 6120ef2a92..80ffbfb455 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -11,6 +11,7 @@ 
 #include "qemu/error-report.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-temp-internal.h"
 #include "exec/exec-all.h"
 #include "exec/gen-icount.h"
 #include "exec/log.h"
@@ -18,6 +19,29 @@ 
 #include "exec/plugin-gen.h"
 #include "exec/replay-core.h"
 
+static void gen_tb_exec_count(TranslationBlock *tb)
+{
+    if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+        TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
+
+        tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal);
+        if (sizeof(tb->tb_stats->executions.normal) == 4) {
+            TCGv_i32 t = tcg_temp_ebb_new_i32();
+            tcg_gen_ld_i32(t, ptr, 0);
+            tcg_gen_addi_i32(t, t, 1);
+            tcg_gen_st_i32(t, ptr, 0);
+            tcg_temp_free_i32(t);
+        } else {
+            TCGv_i64 t = tcg_temp_ebb_new_i64();
+            tcg_gen_ld_i64(t, ptr, 0);
+            tcg_gen_addi_i64(t, t, 1);
+            tcg_gen_st_i64(t, ptr, 0);
+            tcg_temp_free_i64(t);
+        }
+        tcg_temp_free_ptr(ptr);
+    }
+}
+
 bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
 {
     /* Suppress goto_tb if requested. */
@@ -56,6 +80,7 @@  void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
 
     /* Start translating.  */
     gen_tb_start(db->tb);
+    gen_tb_exec_count(tb);
     ops->tb_start(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index f6de79a6b4..20e7835ff0 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -2,6 +2,7 @@ 
 #define GEN_ICOUNT_H
 
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 
 /* Helpers for instruction counting code generation.  */
 
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index 87ee3d902e..fa71eb6f0c 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -11,6 +11,9 @@ 
 #ifndef TB_STATS_FLAGS
 #define TB_STATS_FLAGS
 
+#define TB_NOTHING    (1 << 0)
+#define TB_EXEC_STATS (1 << 1)
+
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
 void disable_collect_tb_stats(void);
@@ -18,4 +21,6 @@  void pause_collect_tb_stats(void);
 bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
+uint32_t get_default_tbstats_flag(void);
+
 #endif
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index b519465665..eb1fa92a4e 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -31,6 +31,9 @@ 
 #include "exec/tb-stats-flags.h"
 #include "tcg/tcg.h"
 
+#define tb_stats_enabled(tb, JIT_STATS) \
+    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
+
 typedef struct TBStatistics TBStatistics;
 
 /*
@@ -47,6 +50,16 @@  struct TBStatistics {
     uint32_t     flags;
     /* cs_base isn't included in the hash but we do check for matches */
     target_ulong cs_base;
+
+    /* which stats are enabled for this TBStats */
+    uint32_t stats_enabled;
+
+    /* Execution stats */
+    struct {
+        unsigned long normal;
+        unsigned long atomic;
+    } executions;
+
 };
 
 bool tb_stats_cmp(const void *ap, const void *bp);